import csv
import json
import re


def deal_line(content, key_names):
    if len(content) == 0:
        return ""
    content = json.loads(content)[key_names[0]]
    pattern = re.compile(r'[^\x00-\x7F]+')
    filtered_content = re.sub(pattern, '', content)
    if len(filtered_content) < len(content):
        return ""
    return content


json_str = '{"query": {"neural": {"body_knn": {"query_text": "女演员每小时能赚多少钱", "model_id": "AkJOl4kBsArIVhd-7wdN"}}}}'

data = json.loads(json_str)
query_text = data['query']['neural']['body_knn']['query_text']

with open('输出.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow([query_text])

# inputName = "/Users/wjunshen/Downloads/queries.jsonl"
inputName = "/Users/wjunshen/Downloads/queries-ms-marco.json"
startLine = 0
lineCount = 100000
outputName = "/Users/wjunshen/Downloads/queries.csv"
# keyNames = ["text"]
key_name = "query_text"
output_file = open(outputName, 'w', newline='', encoding="utf-8")
output_file.write("text\n")

with open(inputName, 'r') as f:
    for i in range(startLine, startLine + lineCount):
        line = f.readline()
        if len(line) == 0:
            continue

        data = json.loads(line)
        content = data['query']['neural']['body_knn']['query_text']

        print(content + "\n")
        output_file.write(content + '\n')

output_file.close()
